In [2]:
%matplotlib inline
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cPickle as pickle
import gzip
ERCOT provides hourly data on the aggregate level of wind power across the state from 2007-2015. The data files also contain total ERCOT load in MW and the total installed capacity of wind turbines (MW). With this data we can see how ERCOT demand evolved over the 8 years, how wind generation increased, and the change in how much of the total load was met with wind.
In [2]:
filename = 'ERCOT wind data.csv'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
In [3]:
ercot = pd.read_csv(fullpath, index_col=0)
In [132]:
ercot.head()
Out[132]:
Distribution of wind output as % of load for the whole dataset.
In [5]:
sns.distplot(ercot['Wind Output, % of Load'])
Out[5]:
How does this distribution change over time? Need to set a year column.
In [5]:
#Define year and month for the visualizations below
ercot['month'] = pd.to_datetime(ercot.index).month
ercot['year'] = pd.to_datetime(ercot.index).year
The figures below shows a clear shift in the amount of generation from wind over time. In 2007 it was rarely above 10%, and the lowest bin is ~40% of the total. The distribution flattens considerably by 2010, and by 2013 starts creeping above 30% of total load.
In [7]:
sns.violinplot(x='year', y='Wind Output, % of Load', data=ercot, cut=0, linewidth=1.25)
# path = os.path.join('..', 'Midterm Report', 'Wind violin plot.svg')
# plt.savefig(path)
Out[7]:
In [7]:
g = sns.FacetGrid(ercot, col='year', col_wrap=3)
g.map(sns.distplot, 'Wind Output, % of Load')
Out[7]:
The figures below look at total ERCOT load.
This figure shows that average load across an entire year has been increasing over time.
In [54]:
sns.pointplot('year', 'ERCOT Load, MW', data=ercot)
Out[54]:
In [41]:
sns.pointplot('month', 'ERCOT Load, MW', data=ercot, hue='year',
palette='Blues', scale=0.5)
Out[41]:
Interesting. There is far less of a monthly pattern to the wind generation.
In [10]:
fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,4))
sns.pointplot('month', 'ERCOT Load, MW', data=ercot, hue='year',
palette='Blues', scale=0.5, ax=ax1)
sns.pointplot('month', 'Total Wind Output, MW', data=ercot, hue='year',
palette='Greens', scale=0.5, ax=ax2)
ax2.legend(bbox_to_anchor=(1, 1), loc=2, title='year')
plt.tight_layout()
path = os.path.join('..', 'Midterm Report', 'Monthly ERCOT load and wind2.svg')
plt.savefig(path, bbox_inches='tight')
Out[10]:
Out[10]:
Out[10]:
There's a definite dip in wind output in the late summer/early fall (July-Sept). The dip has been getting shallower over time though. Oddly, there was a huge drop in wind output in March 2015.
In [49]:
sns.pointplot('month', 'Wind Output, % of Installed', data=ercot, hue='year',
palette='Blues', scale=0.5)
plt.legend(bbox_to_anchor=(1, 1), loc=2, title='year')
Out[49]:
Out[49]:
With the exception of 2007, almost all capacity additions happen in the second half of the year.
In [8]:
sns.pointplot('month', 'Total Wind Installed, MW', data=ercot, hue='year',
palette='Blues', scale=0.5)
plt.legend(bbox_to_anchor=(1, 1), loc=2, title='year')
path = os.path.join('..', 'Midterm Report', 'Monthly ERCOT wind capacity.svg')
plt.savefig(path, bbox_inches='tight')
Out[8]:
Out[8]:
These distribution plots show hourly ERCOT load for each year. It's been slowly creeping up, with less of a peak around 30GW.
In [42]:
g = sns.FacetGrid(ercot, col='year', col_wrap=3)
g.map(sns.distplot,'ERCOT Load, MW')
g.set_xticklabels(rotation=30)
Out[42]:
Out[42]:
In [3]:
filename = 'EIA 860.pkl'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
eia860 = pickle.load(open(fullpath, 'rb'))
In [16]:
eia860.keys()
Out[16]:
In [4]:
all860 = pd.concat(eia860)
In [5]:
filename = 'All EIA 860.csv'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
all860.to_csv(fullpath)
In [4]:
all860.head()
Out[4]:
In [5]:
all860['year'] = all860.index.get_level_values(0)
In [6]:
all860.reset_index(drop=True, inplace=True)
In [33]:
sns.boxplot(x='year', y='NAMEPLATE_CAPACITY(MW)', data=all860)
Out[33]:
In [30]:
sns.violinplot(x='year', y='NAMEPLATE_CAPACITY(MW)', data=all860, cut=0)
Out[30]:
In [7]:
df = eia860[2007]
In [12]:
df = df.merge(eia923[2007][['PLANT_ID', 'fuel', 'NET_GENERATION_(MEGAWATTHOURS)']], on='PLANT_ID')
In [103]:
df.groupby('fuel').sum()['NAMEPLATE_CAPACITY(MW)'].plot(kind='bar')
plt.ylabel('Nameplate Capacity in 2007 (MW)')
Out[103]:
Out[103]:
Of the fuels shown the plot above, we only need to look at:
In [13]:
df = df[df['fuel'].isin(['DFO', 'LIG', 'NG', 'PC', 'SUB'])]
In [105]:
df.head()
Out[105]:
In [14]:
def CF(row):
possible_gen = row['NAMEPLATE_CAPACITY(MW)'] * 8760
cf = row['NET_GENERATION_(MEGAWATTHOURS)'] / possible_gen
return cf
df['Capacity Factor'] = df.apply(CF, axis=1)
In [15]:
df.head()
Out[15]:
Apparently one of the plants has something weird going on - tiny nameplate capacity and a CF of 11. And another one has negative net generation?
In [124]:
g = sns.FacetGrid(df, hue='fuel', size=5, aspect=1.5)
g.map(plt.scatter, 'NAMEPLATE_CAPACITY(MW)', 'Capacity Factor')
plt.legend()
Out[124]:
Out[124]:
The figure below can be used in the midterm report.
In [17]:
g = sns.FacetGrid(df, hue='fuel', size=5, aspect=1.5, ylim=(-0.05,1))
g.map(plt.scatter, 'NAMEPLATE_CAPACITY(MW)', 'Capacity Factor')
plt.title('2007 ERCOT Fossil Fuel Power Plants')
plt.legend(labels=['Subbituminous coal', 'Lignite coal', 'Natural gas', 'Petroleum coke', 'Diesel fuel'])
plt.tight_layout()
path = os.path.join('Midterm figures', 'ERCOT power plants 2007.svg')
plt.savefig(path)
Out[17]:
Out[17]:
Out[17]:
In [ ]:
In [6]:
filename = 'EIA 923.pkl'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
eia923 = pickle.load(open(fullpath, 'rb'))
In [7]:
all923 = pd.concat(eia923)
filename = 'All EIA 923.csv'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
all860.to_csv(fullpath)
In [35]:
eia923.keys()
Out[35]:
In [39]:
[(idx, x) for idx, x in enumerate(eia923[2007].columns)]
Out[39]:
Define a list of fuel types
In [45]:
fuels = [text[29:] for text in eia923[2007].columns[44:70]]
In [46]:
fuels
Out[46]:
In [51]:
eia923[2007].iloc[0,44:70].idxmax()[29:]
Out[51]:
In [10]:
def top_fuel(row):
#Fraction of largest fuel for electric heat input
try:
fuel = row.iloc[44:70].idxmax()[29:]
except:
print row.iloc[44:70]
return None
return fuel
In [68]:
top_fuel(eia923[2007].loc[0,:])
Out[68]:
In [76]:
eia923[2007].apply(top_fuel, axis=1).value_counts()
Out[76]:
In [82]:
eia923[2011].apply(top_fuel, axis=1).value_counts()
Out[82]:
In [11]:
eia923[2007]['fuel'] = eia923[2007].apply(top_fuel, axis=1)
In [84]:
eia923[2007]
Out[84]:
In [88]:
eia923[2007].groupby('fuel').sum()['NET_GENERATION_(MEGAWATTHOURS)'].plot(kind='bar')
plt.ylabel('Net Generation in 2007 (MWh)')
Out[88]:
Out[88]:
In [90]:
eia923[2009]['fuel'] = eia923[2009].apply(top_fuel, axis=1)
eia923[2009].groupby('fuel').sum()['NET_GENERATION_(MEGAWATTHOURS)'].plot(kind='bar')
plt.ylabel('Net Generation in 2009 (MWh)')
Out[90]:
Out[90]:
In [9]:
# load a zipped pickle file
# from http://stackoverflow.com/questions/18474791/decreasing-the-size-of-cpickle-objects
def load_zipped_pickle(filename):
with gzip.open(filename, 'rb') as f:
loaded_object = pickle.load(f)
return loaded_object
In [10]:
filename = 'EPA hourly dictionary.pgz'
path = '../Clean Data'
fullpath = os.path.join(path, filename)
epaDict = load_zipped_pickle(fullpath)
In [11]:
df = epaDict['2015 July-Dec']
set(df['PLANT_ID'])
Out[11]:
In [12]:
df_temp = df[df['PLANT_ID'].isin([127, 298, 3439])].fillna(0)
In [13]:
df_temp.head()
Out[13]:
In [17]:
g = sns.FacetGrid(df_temp, col='PLANT_ID', sharey=False)
g.map(plt.plot, 'DATETIME', 'GROSS LOAD (MW)')
g.set_xticklabels(rotation=30)
path = os.path.join('Midterm figures', 'Sample hourly load.svg')
plt.savefig(path)
Out[17]:
Out[17]:
In [ ]: